{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# scikit-learn中的PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "digits = datasets.load_digits()\n",
    "X = digits.data\n",
    "y = digits.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1347, 64)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection._split import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 不降维，直接使用kNN分类看看效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 23.4 ms, sys: 10.8 ms, total: 34.2 ms\nWall time: 42.1 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "from sklearn.neighbors.classification import KNeighborsClassifier\n",
    "\n",
    "knn_clf = KNeighborsClassifier()\n",
    "# 手写数字，每个样本的特征应该是每个像素的RGB，度量是一样的，无需归一化\n",
    "knn_clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.98666666666666669"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn_clf.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 试试PCA后，kNN的效果"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 直接降到二维"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.decomposition.pca import PCA\n",
    "\n",
    "pca = PCA(n_components=2)\n",
    "pca.fit(X_train)\n",
    "X_train_reduction = pca.transform(X_train)\n",
    "# 测试数据集也需要降维\n",
    "X_test_reduction = pca.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.96 ms, sys: 635 µs, total: 2.59 ms\nWall time: 2.02 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "knn_clf = KNeighborsClassifier()\n",
    "knn_clf.fit(X_train_reduction, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.60666666666666669"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn_clf.score(X_test_reduction, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "时间节省很多，但精度大幅下降  \n",
    "### 应该选择降到什么维度，以保持时间和精度的平衡呢？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  1.45668166e-01,   1.37354688e-01,   1.17777287e-01,\n         8.49968861e-02,   5.86018996e-02,   5.11542945e-02,\n         4.26605279e-02,   3.60119663e-02,   3.41105814e-02,\n         3.05407804e-02,   2.42337671e-02,   2.28700570e-02,\n         1.80304649e-02,   1.79346003e-02,   1.45798298e-02,\n         1.42044841e-02,   1.29961033e-02,   1.26617002e-02,\n         1.01728635e-02,   9.09314698e-03,   8.85220461e-03,\n         7.73828332e-03,   7.60516219e-03,   7.11864860e-03,\n         6.85977267e-03,   5.76411920e-03,   5.71688020e-03,\n         5.08255707e-03,   4.89020776e-03,   4.34888085e-03,\n         3.72917505e-03,   3.57755036e-03,   3.26989470e-03,\n         3.14917937e-03,   3.09269839e-03,   2.87619649e-03,\n         2.50362666e-03,   2.25417403e-03,   2.20030857e-03,\n         1.98028746e-03,   1.88195578e-03,   1.52769283e-03,\n         1.42823692e-03,   1.38003340e-03,   1.17572392e-03,\n         1.07377463e-03,   9.55152460e-04,   9.00017642e-04,\n         5.79162563e-04,   3.82793717e-04,   2.38328586e-04,\n         8.40132221e-05,   5.60545588e-05,   5.48538930e-05,\n         1.08077650e-05,   4.01354717e-06,   1.23186515e-06,\n         1.05783059e-06,   6.06659094e-07,   5.86686040e-07,\n         9.18612290e-34,   9.18612290e-34,   9.18612290e-34,\n         8.82949950e-34])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca = PCA(n_components=np.shape(X_train)[1])\n",
    "pca.fit(X_train)\n",
    "pca.explained_variance_ratio_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 画个折线图看看保留的维数与精度之间的关系"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAHU5JREFUeJzt3Xl4XfV95/H3V/tmLbbkBVmyjPGC7RowitkS1kAMTwbSwLQ4hGZrnOkTUjolTWA6Q1syeSaENJmkQzM1CSXQFpeQNuMmTgwhEBKCwSbYxptk4QXJthYv2rcr3e/8ca9BCNu6tq98dO79vJ7nPvecc3/W/R5z/NHhd37n/MzdERGR1JIRdAEiIpJ8CncRkRSkcBcRSUEKdxGRFKRwFxFJQQp3EZEUpHAXEUlBCncRkRSkcBcRSUFZQX1xeXm519TUBPX1IiKh9Nprrx1y94qx2gUW7jU1NWzcuDGorxcRCSUz25dIO3XLiIikIIW7iEgKUriLiKQghbuISApSuIuIpKAxw93MHjWzVjPbeoLPzcy+Y2YNZrbFzJYmv0wRETkViZy5PwYsP8nnNwJz46+VwHfPvCwRETkTY45zd/cXzazmJE1uAR732Hx9682s1MxmuPvBJNUoIhPQ0HCUvsgwfZFhBiKx5f7IMANDUSJDUQaHo0SGnchwlMhwlKFhZzjqDEWd4WiU4agz7OAe2x51iI6Y9tPdObbqMGL53dsTNoGmFL3u/GlcUFU6rt+RjJuYKoHGEetN8W3vCXczW0ns7J7q6uokfLWIJCIadboHh+jqH6K7f4iu/ghdA++s9wwM0TN47H2YnoEhegeH6RscpmdwiL7B4dh6PMD7I8NEhidOWCbKLOgKYqYW54Ui3BPm7quAVQC1tbXhOzJEAjYwNMyRnkGO9AxytCfCkd5B2ntj6+29ETr7InT2R+jsG4q/R2IBPjiU0IlrfnYmhbmZFORkUZCTSUFOJoU5WZQX5b69npedSf6xV3w99sogPzuTnKwMcjIzyD72nplBZoaRnWlkZWaQlWFkmL39npFB7N0Ms3cC2Iivx2szsxHLsXU5sWSE+36gasT6zPg2ERmDu9M9MMSh7kHaugY41D3w9ntseZBD3QMc7hngaE+E7oGhE/6sSXlZlORnU5yXTXF+FtWTCyjOz2ZSXhaT8rIpzsuiKDe2PCkvi6K8rPi2bIryssjPziQzQ4GZKpIR7muAu8xsNXAJ0KH+dhEYjjptXQPsb+/jQHsfBzv6aOkcoLmzn9bOflo6B2jt6qc/En3Pn80wmFyYS3lRDuVFucyaUsDkwhymFOZQduy9IIfJhTmUFuRQWpBNdqZGNss7xgx3M3sSuBooN7Mm4K+AbAB3/7/AWuAmoAHoBT41XsWKTCTDUaels5/GI700He2j8WjsvSn+3tzRz1D03X0hBTmZTC/OY2pxLhdVlzJ1Ui4V8Vd5UexVMSmXsoIcnUXLGUlktMyKMT534PNJq0hkAunoi9B4pJe3Rrwa46/97X3vuag4rTiXqrICLp5VRmVpPpVl+ZxTmk9laT4zSvKYlJcd0J5Iugnskb8iE0Xv4BB7DvXEXm3x98Ox9/beyLvaTi7Moaosn0WVJSxfPIOqyflUlRVQNbmAc0rzyM3KDGgvRN5N4S5po7M/Ql1zFzubu9jV0sXuth52t3VzoKP/Xe1mlORRM6WQGxfPYHZ5AdWTC6ieXEjV5HydeUtoKNwl5bg7LZ0DvLG/gzf2d7D9QAc7Dnaxv73v7TaTcrM4t6KQS86dwpyKQs6tKGJ2eSE1UwrJz9HZt4Sfwl1C73D3AFuaOtjc1M6Wpligt3UNALFRJ3Mqirh4Vhl3XFrNgumTWDC9mBkleRonLSlN4S6hMjQcZWdzFxv2HmHj3qNsamx/+4zcDM6rKOIDc8tZUlnC780s4fwZxRTk6DCX9KOjXia03sEhNr3VzqvxMH/9raP0DA4DUFmaz4XVpXzi8lksmVnK4soSinJ1SIuAwl0mmJ6BIdbvPszLbx5mw76jbNvfwVDUMYP50yZx68Uzqa2ZTO2sMs4pzQ+6XJEJS+EugXJ36lq6+FVdG7+qb2Pj3qMMDkfJycrgwpmlfO6qc6mtmczFs8oo1kgVkYQp3OWs6xkY4qWGQzxf18YLda0cjA9FnD9tEp+8ooar5lVw8awy8rI1akXkdCnc5aw40N7HL3a08Oz2Fl7ZfYTB4ShFuVm8/7xy7r6ugqvmVzCjRN0sIsmicJdx4e7Ut3TzzLZmntnewhv7OwA4t7yQP7psFtcumEptzWRysvSwK5HxoHCXpIlGndcbj7JuWwvrtjWz73AvAEurS/ny8gVcv3Aa500tCrhKkfSgcJcz4u68tu8oP960n2e2tdDaNUB2pnHZnHJWXnku158/janFeUGXKZJ2FO5yWvYc6uHff9fEjzcd4K0jveRlZ3Dtgql8aNF0rlkwVSNbRAKmcJeEdfRGWLPlAD96rYlNje2YwRVzyvnT6+ayfPF03UAkMoHoX6Oc1NBwlBd3tfGj1/bz7PYWBoejzJ82iftuXMAtF1YyvURdLiITkcJdjqu1s58nX23kyVfformzn7KCbD52STW3XTyTRecU66FbIhOcwl3e5u68uucIT6zfx8+3NjMUdT4wt5y/vnkR1y6YqmGLIiGicBfcnd++eZhvPFPH62+1U5yXxScur+GOS6o5t0JDF0XCSOGe5l7bd4SH1tWxfvcRZpTk8T8/sphbl87UhBUiIadwT1PbDnTw0Lo6Xqhro7woh7/6TwtZsaxaz3MRSREK9zSz91APf/tsPf+x+QAl+dl8efkCPnH5LE1oIZJi9C86TbR29vOdX+5i9auNZGdm8Plr5rDyyjmU5OtmI5FUpHBPcf2RYVa9uJvvvvAmkeEoK5ZV84Vrz9MjAURSnMI9Rbk7P9vazFd/uoP97X3cuHg6X16+gJrywqBLE5GzQOGegnY2d/I3a7bz8u7DLJg+iX/57CVcPqc86LJE5CxSuKeQjr4I33q2nsdf3ktxfjZf+chiVryviqxM3Xwkkm4U7ikgGnV+9LsmHvz5Tg73DHLHJdV88Yb5lBbkBF2aiARE4R5y2w508D9+vJXfvdXO0upSHvvUMhZXlgRdlogETOEeUtGo8+hLe3jw5zspzsvmoduWcOvSmWRk6IFeIpJguJvZcuDbQCbwPXf/2qjPq4EfAKXxNve6+9ok1ypxh7sH+OIPN/N8XRs3LJzG129boi4YEXmXMcPdzDKBh4HrgSZgg5mtcfftI5r9d+Apd/+umS0E1gI141Bv2vvtm4f4s9WbaO+L8MAti7jz0ll6/K6IvEciZ+7LgAZ33w1gZquBW4CR4e5AcXy5BDiQzCIlNmnGd57bxd8938Ds8kIe+9QyFp5TPPYfFJG0lEi4VwKNI9abgEtGtflr4Bkz+wJQCHwwKdUJAAc7+rj7yU28uvcIt108kwduWaRnwYjISSUrIVYAj7n735rZZcATZrbY3aMjG5nZSmAlQHV1dZK+OrX9YnsLX3x6M5GhKN/6wwv4/YtmBl2SiIRAIuG+H6gasT4zvm2kzwDLAdz9ZTPLA8qB1pGN3H0VsAqgtrbWT7PmtDAwNMyDP6vj0Zf2sOicYv5uxUWaOENEEpZIuG8A5prZbGKhfjvwsVFt3gKuAx4zs/OBPKAtmYWmk+6BIVY+vpHfvnmYT15ew303LSA3S89ZF5HEjRnu7j5kZncB64gNc3zU3beZ2QPARndfA9wDPGJm/5XYxdVPurvOzE9De+8gn/zHDbyxv4Nv/sEFfHSpumFE5NQl1OceH7O+dtS2+0csbweuSG5p6ae1s587v/8qew718N07lnLDoulBlyQiIaUhFxNE45Fe7vjeKxzqHuCxT72Py8/TUxxF5PQp3CeAhtZu7vjeevojUf75jy/houqyoEsSkZBTuAdsV0sXKx55BYCnPncZ86dPCrgiEUkFCvcA1bd08bFH1mNmPPnZSzlvqoY6ikhyaBaHgNQ1d7Fi1XoyzFi9UsEuIsmlcA/AzuZOVjyynqzMWLDP0c1JIpJk6pY5y/Ye6uFjj7xCTmYGT668lNmasFpExoHO3M+inoEhVj6xkai7gl1ExpXC/Sxxd/7i6c00tHbzf1YsVbCLyLhSuJ8lf//Cm6x9o5l7b1zA++fqBiURGV8K97PghbpWvvFMHTdfcA6f/cC5QZcjImlA4T7O9h7q4U+ffJ0F04t58NYlmhJPRM4Khfs46o8M87knXiMjw1h158Xk5+ixvSJydmgo5Dj66k93UNfSxQ8+vYyqyQVBlyMiaURn7uPk2e0tPLF+H5/9wGyumlcRdDkikmYU7uOgpbOfLz29mYUzivnih+YHXY6IpCGFe5JFo849T22mLzLMd1ZcpOnxRCQQCvck+/5v9vCbhkPc/+FFehiYiARG4Z5EW/d38PV1O/nQommsWFYVdDkiksYU7knSHxnm7tWvM7kwh699VOPZRSRYGgqZJN9YV8ebbT08/ulllBXmBF2OiKQ5nbknwat7jvD9l/bw8UuruVLDHkVkAlC4n6GegSG++MPNVJUVcN+N5wddjogIoG6ZM/a1n+2k8Wgv/7ryMgpz9dcpIhODztzPwK93tfHE+n388ftns2z25KDLERF5m8L9NHX2R/jS01uYU1HIPTfoLlQRmVjUj3Ca/tfanbR2DfCjP7mcvGzdhSoiE4vO3E/D1v0drN7wFp+6vIYLq0qDLkdE5D0U7qfI3fnKT7ZTVpDDF66bG3Q5IiLHpXA/RT/f2swre45wzw3zKMnPDrocEZHjUrifgv7IMF9du4MF0yfxh7V6doyITFwJhbuZLTezOjNrMLN7T9DmD8xsu5ltM7N/SW6ZE8OjL+2h6Wgf9394IVmZ+r0oIhPXmKNlzCwTeBi4HmgCNpjZGnffPqLNXOA+4Ap3P2pmU8er4KC0dvbz8C8buGHhNC4/rzzockRETiqR089lQIO773b3QWA1cMuoNp8FHnb3owDu3prcMoP30Lo6Boej/Leb9IgBEZn4Egn3SqBxxHpTfNtI84B5ZvaSma03s+XH+0FmttLMNprZxra2ttOrOABb93fw9O+a+PQVs6kpLwy6HBGRMSWr4zgLmAtcDawAHjGz9wwAd/dV7l7r7rUVFeF5euK3nq2nND+bz197XtCliIgkJJFw3w+MHBoyM75tpCZgjbtH3H0PUE8s7ENv+4FOntvZyqevmE1xnoY+ikg4JBLuG4C5ZjbbzHKA24E1o9r8mNhZO2ZWTqybZncS6wzM37/QQFFuFn90WU3QpYiIJGzMcHf3IeAuYB2wA3jK3beZ2QNmdnO82TrgsJltB54H/sLdD49X0WfL7rZufvrGQe68bBYlBTprF5HwSOjBYe6+Flg7atv9I5Yd+PP4K2X8w692k5OZwaevmB10KSIip0R34pzAgfY+/u31Jm5/XxUVk3KDLkdE5JQo3E9g1Yu7cYeVV80JuhQRkVOmcD+OQ90DrN7wFr9/USWVpflBlyMicsoU7sfx6G/2MDAU5b9crbN2EQknhfsoHX0Rnnh5Hzf93gzmVBQFXY6IyGlRuI/y1IZGugaG+BP1tYtIiCncR4hGnX96ZR/vqyljcWVJ0OWIiJw2hfsIL+5qY9/hXu7U3agiEnIK9xGeeHkf5UW5LF80PehSRETOiMI9rvFIL7+sa2XFsipysvTXIiLhphSL+6dX9pFhxscuqQ66FBGRM6ZwJzbx9VMbGrn+/GnMKNFNSyISfgp34KdbDnK0N8Kdl80KuhQRkaRQuAOPr9/HnIpCLp8zJehSRESSIu3DfUtTO5sb27nz0lmYWdDliIgkRdqH++Mv76MgJ5OPXjwz6FJERJImrcO9oy/Cf2w+wEcuqtT8qCKSUtI63F+sb2NgKMqtSyuDLkVEJKnSOtyfr2ultCCbC6vKgi5FRCSp0jbco1Hnxfo2rpxbQWaGLqSKSGpJ23DfeqCDQ92DXLOgIuhSRESSLm3D/fmdbZjBlXMV7iKSetI23F+ob2XJzFKmFOUGXYqISNKlZbgf6RlkU2M718zXWbuIpKa0DPdf72rDHa6ePzXoUkRExkVahvvzO1uZUpjDEk2lJyIpKu3CfTjq/Kq+javmVZChIZAikqLSLty3NLVztDfCVepvF5EUlnbh/nxdGxkaAikiKS7twv2FulYuqi6jrDAn6FJERMZNQuFuZsvNrM7MGszs3pO0u9XM3Mxqk1di8rR1DbClqYOr5+msXURS25jhbmaZwMPAjcBCYIWZLTxOu0nA3cAryS4yWV6sbwPgmgUaAikiqS2RM/dlQIO773b3QWA1cMtx2n0FeBDoT2J9SfVCfRvlRbksnFEcdCkiIuMqkXCvBBpHrDfFt73NzJYCVe7+0yTWllTRqPPrXRoCKSLp4YwvqJpZBvBN4J4E2q40s41mtrGtre1Mv/qU7DncQ3tvhGWz9ex2EUl9iYT7fqBqxPrM+LZjJgGLgRfMbC9wKbDmeBdV3X2Vu9e6e21Fxdm9qLmlqR2AC6pKz+r3iogEIZFw3wDMNbPZZpYD3A6sOfahu3e4e7m717h7DbAeuNndN45Lxadpc2MH+dmZnFdRFHQpIiLjbsxwd/ch4C5gHbADeMrdt5nZA2Z283gXmCxbmtpZXFlMVmbaDe0XkTSUlUgjd18LrB217f4TtL36zMtKrshwlG0HOvn4pbOCLkVE5KxIi9PY+pYuBoaiLJmpp0CKSHpIi3Df0tQBwAUzdTFVRNJDWoT75sZ2SvKzmTWlIOhSRETOivQI96YOlswswUw3L4lIekj5cO8bHKa+pUtdMiKSVlI+3Lcf7GA46rqYKiJpJeXDfXNj/GKq7kwVkTSS8uG+pamdacW5TCvOC7oUEZGzJg3CvYMl6m8XkTST0uHe0Rdh96EeLlB/u4ikmZQO9zea1N8uIukppcN9c/wxv0sqFe4ikl5SOty3NLVTM6WAkoLsoEsRETmrUjzcdTFVRNJTyoZ7a1c/Bzv6dfOSiKSllA33Lbp5SUTSWOqGe1M7GQaLzikOuhQRkbMuZcN9c1MH86ZNoiAnocmmRERSSsqG+/aDnSw6R/3tIpKeUjLcD3cP0NY1wPkzJgVdiohIIFIy3OuauwBYMF397SKSnlIy3HccC3eduYtImkrJcK9r7qS8KIfyotygSxERCURKhvvO5i51yYhIWku5cB+OOnXNXcyfri4ZEUlfKRfu+w73MDAUZYHCXUTSWMqF+06NlBERSc1wzzCYO60o6FJERAKTeuF+sJOa8kLysjODLkVEJDApF+51LV2cry4ZEUlzKRXuPQND7Dvcq5EyIpL2Egp3M1tuZnVm1mBm9x7n8z83s+1mtsXMnjOzWckvdWz1LccupircRSS9jRnuZpYJPAzcCCwEVpjZwlHNXgdq3X0J8DTw9WQXmohjI2XOn6FuGRFJb4mcuS8DGtx9t7sPAquBW0Y2cPfn3b03vroemJncMhNT19xFYU4mlaX5QXy9iMiEkUi4VwKNI9ab4ttO5DPAz473gZmtNLONZraxra0t8SoTtONgJ/OnTyIjw5L+s0VEwiSpF1TN7ONALfDQ8T5391XuXuvutRUVFcn8atydupYu5mukjIgIicxBtx+oGrE+M77tXczsg8BfAle5+0ByyktcS+cA7b0RTdAhIkJiZ+4bgLlmNtvMcoDbgTUjG5jZRcA/ADe7e2vyyxzbjuZOAOZPU7iLiIwZ7u4+BNwFrAN2AE+5+zYze8DMbo43ewgoAn5oZpvMbM0Jfty40exLIiLvSKRbBndfC6wdte3+EcsfTHJdp2znwU5mlORRUpAddCkiIoFLmTtUYxN0qEtGRARSJNwjw1HebOvWSBkRkbiUCPfdbT1Ehl0jZURE4lIi3HfGR8roYqqISEyKhHsX2ZnGuRWFQZciIjIhpES41zV3MaeiiOzMlNgdEZEzlhJpWN/SxVzdvCQi8rbQh3vPwBBNR/uYN1VzpoqIHBP6cG9o7QbQmbuIyAihD/djsy/Nm6YzdxGRY0If7rtau8nJzKB6ckHQpYiITBihD/f6li7OrSgkSyNlRETeFvpE3NXSzTz1t4uIvEuow717YIj97X3qbxcRGSXU4a6RMiIixxfqcH9npIzCXURkpFCH+66WLnKzNFJGRGS0UId7fUs3cyqKyMywoEsREZlQQh3uu1q6dDFVROQ4QhvuXf0RDnT062KqiMhxhDbcd8VHyuhiqojIe4U33PVMGRGREwptuNe3dJOXnUFVmUbKiIiMFuJw7+K8qUVkaKSMiMh7hDbcd7V0M2+q+ttFRI4nlOHe0RehuVMjZURETiSU4d7QGruYOldT64mIHFcow72+RcMgRUROJqTh3kV+diYzy/KDLkVEZEIKZbjvaunWSBkRkZNIKNzNbLmZ1ZlZg5nde5zPc83sX+Ofv2JmNckudKT6li7m6uYlEZETGjPczSwTeBi4EVgIrDCzhaOafQY46u7nAd8CHkx2ocd09EZo7RpQf7uIyEkkcua+DGhw993uPgisBm4Z1eYW4Afx5aeB68xsXPpM6lv12AERkbEkEu6VQOOI9ab4tuO2cfchoAOYkowCRzs2+9Jc3cAkInJCZ/WCqpmtNLONZraxra3ttH5GRVEu1y+cRmWpRsqIiJxIVgJt9gNVI9Znxrcdr02TmWUBJcDh0T/I3VcBqwBqa2v9dAq+YdF0blg0/XT+qIhI2kjkzH0DMNfMZptZDnA7sGZUmzXAJ+LLtwG/dPfTCm8RETlzY565u/uQmd0FrAMygUfdfZuZPQBsdPc1wPeBJ8ysAThC7BeAiIgEJJFuGdx9LbB21Lb7Ryz3A/85uaWJiMjpCuUdqiIicnIKdxGRFKRwFxFJQQp3EZEUpHAXEUlBFtRwdDNrA/ad5h8vBw4lsZwghH0fVH/wwr4Pqv/0zHL3irEaBRbuZ8LMNrp7bdB1nImw74PqD17Y90H1jy91y4iIpCCFu4hICgpruK8KuoAkCPs+qP7ghX0fVP84CmWfu4iInFxYz9xFROQkQhfuY03WPdGY2aNm1mpmW0dsm2xmz5rZrvh7WZA1noyZVZnZ82a23cy2mdnd8e1h2oc8M3vVzDbH9+Fv4ttnxyd0b4hP8J4TdK0nY2aZZva6mf0kvh6a+s1sr5m9YWabzGxjfFtojiEAMys1s6fNbKeZ7TCzyybyPoQq3BOcrHuieQxYPmrbvcBz7j4XeC6+PlENAfe4+0LgUuDz8b/zMO3DAHCtu18AXAgsN7NLiU3k/q34xO5HiU30PpHdDewYsR62+q9x9wtHDB8M0zEE8G3g5+6+ALiA2H+LibsP7h6aF3AZsG7E+n3AfUHXlUDdNcDWEet1wIz48gygLugaT2Ff/h9wfVj3ASgAfgdcQuwGlKz49ncdWxPtRWwGtOeAa4GfABay+vcC5aO2heYYIja73B7i1ynDsA+hOnMnscm6w2Caux+MLzcD04IsJlFmVgNcBLxCyPYh3qWxCWgFngXeBNo9NqE7TPxj6X8DXwKi8fUphKt+B54xs9fMbGV8W5iOodlAG/CP8a6x75lZIRN4H8IW7inHY7/yJ/yQJTMrAn4E/Jm7d478LAz74O7D7n4hsTPgZcCCgEtKmJl9GGh199eCruUMvN/dlxLrUv28mV058sMQHENZwFLgu+5+EdDDqC6YibYPYQv3RCbrDoMWM5sBEH9vDbiekzKzbGLB/s/u/m/xzaHah2PcvR14nlg3Rml8QneY2MfSFcDNZrYXWE2sa+bbhKd+3H1//L0V+Hdiv2DDdAw1AU3u/kp8/WliYT9h9yFs4Z7IZN1hMHJC8U8Q68eekMzMiM2Ru8PdvzniozDtQ4WZlcaX84ldM9hBLORvizebsPvg7ve5+0x3ryF2zP/S3e8gJPWbWaGZTTq2DNwAbCVEx5C7NwONZjY/vuk6YDsTeR+C7vQ/jQsbNwH1xPpM/zLoehKo90ngIBAh9tv/M8T6S58DdgG/ACYHXedJ6n8/sf/V3AJsir9uCtk+LAFej+/DVuD++PZzgVeBBuCHQG7QtSawL1cDPwlT/fE6N8df2479uw3TMRSv90JgY/w4+jFQNpH3QXeoioikoLB1y4iISAIU7iIiKUjhLiKSghTuIiIpSOEuIpKCFO4iIilI4S4ikoIU7iIiKej/A5gUHi3DQfx7AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10a4d99e8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot([i for i in range(X_train.shape[1])], \n",
    "         [np.sum(pca.explained_variance_ratio_[:i]) for i in range(X_train.shape[1])])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### scikit-learn中初始化PCA时,n_components，如果是1个小于1的数，表示需要解释多少的数据（一个小于等于1的百分比数）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n  svd_solver='auto', tol=0.0, whiten=False)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca = PCA(0.95)\n",
    "pca.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.95"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca.n_components"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "28"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca.n_components_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_reduction = pca.transform(X_train)\n",
    "X_test_reduction = pca.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.12 ms, sys: 647 µs, total: 3.77 ms\nWall time: 3.13 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "knn_clf = KNeighborsClassifier()\n",
    "knn_clf.fit(X_train_reduction, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.97999999999999998"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn_clf.score(X_test_reduction, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
